In [79]:
# Importing necessary libraries , rest of the libraries imported as and when needed
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.stats import zscore
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix
In [2]:
df_vehicle=pd.read_csv('vehicle.csv')  # reading data file
In [3]:
df_vehicle.head()   # viewing first 5 records
Out[3]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio class
0 95 48.0 83.0 178.0 72.0 10 162.0 42.0 20.0 159 176.0 379.0 184.0 70.0 6.0 16.0 187.0 197 van
1 91 41.0 84.0 141.0 57.0 9 149.0 45.0 19.0 143 170.0 330.0 158.0 72.0 9.0 14.0 189.0 199 van
2 104 50.0 106.0 209.0 66.0 10 207.0 32.0 23.0 158 223.0 635.0 220.0 73.0 14.0 9.0 188.0 196 car
3 93 41.0 82.0 159.0 63.0 9 144.0 46.0 19.0 143 160.0 309.0 127.0 63.0 6.0 10.0 199.0 207 van
4 85 44.0 70.0 205.0 103.0 52 149.0 45.0 19.0 144 241.0 325.0 188.0 127.0 9.0 11.0 180.0 183 bus
In [4]:
df_vehicle.describe()    # exploring the statistical features
Out[4]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio
count 846.000000 841.000000 842.000000 840.000000 844.000000 846.000000 845.000000 845.000000 843.000000 846.000000 843.000000 844.000000 844.000000 842.000000 840.000000 845.000000 845.000000 846.000000
mean 93.678487 44.828775 82.110451 168.888095 61.678910 8.567376 168.901775 40.933728 20.582444 147.998818 188.631079 439.494076 174.709716 72.447743 6.364286 12.602367 188.919527 195.632388
std 8.234474 6.152172 15.778292 33.520198 7.891463 4.601217 33.214848 7.816186 2.592933 14.515652 31.411004 176.666903 32.584808 7.486190 4.920649 8.936081 6.155809 7.438797
min 73.000000 33.000000 40.000000 104.000000 47.000000 2.000000 112.000000 26.000000 17.000000 118.000000 130.000000 184.000000 109.000000 59.000000 0.000000 0.000000 176.000000 181.000000
25% 87.000000 40.000000 70.000000 141.000000 57.000000 7.000000 147.000000 33.000000 19.000000 137.000000 167.000000 318.000000 149.000000 67.000000 2.000000 5.000000 184.000000 190.250000
50% 93.000000 44.000000 80.000000 167.000000 61.000000 8.000000 157.000000 43.000000 20.000000 146.000000 179.000000 363.500000 173.500000 71.500000 6.000000 11.000000 188.000000 197.000000
75% 100.000000 49.000000 98.000000 195.000000 65.000000 10.000000 198.000000 46.000000 23.000000 159.000000 217.000000 587.000000 198.000000 75.000000 9.000000 19.000000 193.000000 201.000000
max 119.000000 59.000000 112.000000 333.000000 138.000000 55.000000 265.000000 61.000000 29.000000 188.000000 320.000000 1018.000000 268.000000 135.000000 22.000000 41.000000 206.000000 211.000000
In [5]:
df_vehicle.shape
Out[5]:
(846, 19)
In [6]:
df_vehicle.columns.T  # Seeing column name in data frame
Out[6]:
Index(['compactness', 'circularity', 'distance_circularity', 'radius_ratio',
       'pr.axis_aspect_ratio', 'max.length_aspect_ratio', 'scatter_ratio',
       'elongatedness', 'pr.axis_rectangularity', 'max.length_rectangularity',
       'scaled_variance', 'scaled_variance.1', 'scaled_radius_of_gyration',
       'scaled_radius_of_gyration.1', 'skewness_about', 'skewness_about.1',
       'skewness_about.2', 'hollows_ratio', 'class'],
      dtype='object')
# There are total 18 columns
In [7]:
df_vehicle.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 846 entries, 0 to 845
Data columns (total 19 columns):
compactness                    846 non-null int64
circularity                    841 non-null float64
distance_circularity           842 non-null float64
radius_ratio                   840 non-null float64
pr.axis_aspect_ratio           844 non-null float64
max.length_aspect_ratio        846 non-null int64
scatter_ratio                  845 non-null float64
elongatedness                  845 non-null float64
pr.axis_rectangularity         843 non-null float64
max.length_rectangularity      846 non-null int64
scaled_variance                843 non-null float64
scaled_variance.1              844 non-null float64
scaled_radius_of_gyration      844 non-null float64
scaled_radius_of_gyration.1    842 non-null float64
skewness_about                 840 non-null float64
skewness_about.1               845 non-null float64
skewness_about.2               845 non-null float64
hollows_ratio                  846 non-null int64
class                          846 non-null object
dtypes: float64(14), int64(4), object(1)
memory usage: 125.7+ KB
# 1 out of 18 column i.e. target column is not integer and hence later we need to convert object to numeric
In [8]:
df_vehicle.isna().sum()   # looking for missing values , so those can be imputed
Out[8]:
compactness                    0
circularity                    5
distance_circularity           4
radius_ratio                   6
pr.axis_aspect_ratio           2
max.length_aspect_ratio        0
scatter_ratio                  1
elongatedness                  1
pr.axis_rectangularity         3
max.length_rectangularity      0
scaled_variance                3
scaled_variance.1              2
scaled_radius_of_gyration      2
scaled_radius_of_gyration.1    4
skewness_about                 6
skewness_about.1               1
skewness_about.2               1
hollows_ratio                  0
class                          0
dtype: int64
In [9]:
#df_vehicle.isnull().sum()   # This is same as 'na' values
In [10]:
df_vehicle[df_vehicle['circularity'].isnull()==True]
Out[10]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio class
5 107 NaN 106.0 172.0 50.0 6 255.0 26.0 28.0 169 280.0 957.0 264.0 85.0 5.0 9.0 181.0 183 bus
105 108 NaN 103.0 202.0 64.0 10 220.0 30.0 25.0 168 NaN 711.0 214.0 73.0 11.0 NaN 188.0 199 car
118 85 NaN NaN 128.0 56.0 8 150.0 46.0 19.0 144 168.0 324.0 173.0 82.0 9.0 14.0 180.0 184 bus
266 86 NaN 65.0 116.0 53.0 6 152.0 45.0 19.0 141 175.0 335.0 NaN 85.0 5.0 4.0 179.0 183 bus
396 108 NaN 106.0 177.0 51.0 5 256.0 26.0 28.0 170 285.0 966.0 261.0 87.0 11.0 2.0 182.0 181 bus
In [11]:
df_vehicle['class'].unique()
Out[11]:
array(['van', 'car', 'bus'], dtype=object)
In [12]:
df_vehicle.groupby('class').count()  # checking number of values for each class
Out[12]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio
class
bus 218 214 215 216 217 218 218 218 217 218 218 218 216 217 214 218 218 218
car 429 428 429 426 428 429 428 428 428 429 427 427 429 426 427 428 428 429
van 199 199 198 198 199 199 199 199 198 199 198 199 199 199 199 199 199 199
In [13]:
df_vehicle.groupby('class').median()  # seeing median for each feature respective to the class
Out[13]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio
class
bus 89 44.0 72.0 167.5 64.0 6 152.0 44.0 19.0 145 177.0 344.0 176.0 76.0 5.0 10.0 186.0 189
car 97 46.0 94.0 186.0 61.0 9 185.0 36.0 22.0 150 206.0 512.0 182.0 70.0 6.0 14.0 189.0 198
van 90 42.0 75.0 144.0 59.0 9 142.0 47.0 18.0 145 164.0 300.0 159.0 72.0 6.0 9.0 188.0 196
In [57]:
df_vehicle.groupby('class').mean()  # seeing mean for each feature respective to the class
Out[57]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio
class
0 91.591743 44.963303 76.701835 165.724771 62.926606 6.431193 170.022936 40.114679 20.573394 146.701835 192.889908 442.986239 180.986239 76.302752 4.798165 10.211009 187.811927 191.325688
1 96.184149 46.034965 88.878788 180.629371 60.993007 8.825175 181.062937 38.088578 21.512821 149.967366 197.932401 500.596737 179.613054 69.925408 6.699301 15.097902 189.473193 197.582751
2 90.562814 42.070352 73.256281 144.853015 59.577889 8.442211 141.537688 47.939698 18.572864 145.175879 163.256281 298.201005 157.276382 71.482412 6.417085 9.698492 188.939698 196.145729
Below is a code for imputing missing value, for each class and for each respective feature to class is imputed with the respective feature median.
In [15]:
for col in df_vehicle.columns:   
    if col == 'class':
        break
        
    df_vehicle[col]=df_vehicle.groupby('class')[col].transform(lambda x: x.fillna(x.median()))
In [16]:
#df_vehicle['circularity']=df_vehicle.groupby('class').circularity.transform(lambda x: x.fillna(x.median()))
In [17]:
df_vehicle.replace({'class': {'bus' : 0,'car' : 1,'van' : 2}},inplace=True)  # assiging numeric value to target class so object can be converted to numeric
df_vehicle['class']=pd.to_numeric(df_vehicle['class'])
df_vehicle.head(10)
Out[17]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio class
0 95 48.0 83.0 178.0 72.0 10 162.0 42.0 20.0 159 176.0 379.0 184.0 70.0 6.0 16.0 187.0 197 2
1 91 41.0 84.0 141.0 57.0 9 149.0 45.0 19.0 143 170.0 330.0 158.0 72.0 9.0 14.0 189.0 199 2
2 104 50.0 106.0 209.0 66.0 10 207.0 32.0 23.0 158 223.0 635.0 220.0 73.0 14.0 9.0 188.0 196 1
3 93 41.0 82.0 159.0 63.0 9 144.0 46.0 19.0 143 160.0 309.0 127.0 63.0 6.0 10.0 199.0 207 2
4 85 44.0 70.0 205.0 103.0 52 149.0 45.0 19.0 144 241.0 325.0 188.0 127.0 9.0 11.0 180.0 183 0
5 107 44.0 106.0 172.0 50.0 6 255.0 26.0 28.0 169 280.0 957.0 264.0 85.0 5.0 9.0 181.0 183 0
6 97 43.0 73.0 173.0 65.0 6 153.0 42.0 19.0 143 176.0 361.0 172.0 66.0 13.0 1.0 200.0 204 0
7 90 43.0 66.0 157.0 65.0 9 137.0 48.0 18.0 146 162.0 281.0 164.0 67.0 3.0 3.0 193.0 202 2
8 86 34.0 62.0 140.0 61.0 7 122.0 54.0 17.0 127 141.0 223.0 112.0 64.0 2.0 14.0 200.0 208 2
9 93 44.0 98.0 186.0 62.0 11 183.0 36.0 22.0 146 202.0 505.0 152.0 64.0 4.0 14.0 195.0 204 1
In [18]:
df_vehicle.boxplot(figsize=(30,20),);  # looking for boxplot,to determine outliers  in various features. 
# It is evident that there are high number of outliers in certain features
In [19]:
sns.boxplot(y='scaled_variance.1',x='class',data=df_vehicle)
Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0x19ac4bbee10>
In [20]:
sns.boxplot(y='scaled_variance',x='class',data=df_vehicle)
Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x19ac4397320>
In [21]:
sns.boxplot(y='radius_ratio',x='class',data=df_vehicle)
Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x19ac4427208>
In [22]:
sns.scatterplot(x='radius_ratio',y='class',data=df_vehicle)
Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x19ac4b8c940>
Below code is to replace the outliers with median value , hence from each feature outlier will be replaced by median value of that feature
In [23]:
for col in df_vehicle.columns:
    if col == 'class':
        continue
    
    quart75=np.percentile(df_vehicle[col],75)
    quart25=np.percentile(df_vehicle[col],25)
    iqr=quart75-quart25
    l_point=quart25-(1.5*iqr)
    u_point=quart75+(1.5*iqr)
    df_vehicle.loc[(df_vehicle[col] < l_point) | (df_vehicle[col] > u_point),col]=df_vehicle[col].median()
In [24]:
df_vehicle.boxplot(figsize=(50,20),); # Below boxplot show there are no outliers in the data now
In [25]:
#df_vehicle[df_vehicle['circularity'].isnull()==True]
In [26]:
#df_vehicle.iloc[[5,105,118],]
In [27]:
import seaborn as sns
sns.pairplot(df_vehicle,hue='class')
C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\nonparametric\kde.py:487: RuntimeWarning: invalid value encountered in true_divide
  binned = fast_linbin(X, a, b, gridsize) / (delta * nobs)
C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\nonparametric\kdetools.py:34: RuntimeWarning: invalid value encountered in double_scalars
  FAC1 = 2*(np.pi*bw/RANGE)**2
Out[27]:
<seaborn.axisgrid.PairGrid at 0x19ac4550fd0>
After closly analysing the above pair plot , it has been observed that few features show the problem has 3 classes and few of them show 4 classes. and the same is stated in the problem statement that it is hard to distinguish between cars.
In [28]:
df_vehicle.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 846 entries, 0 to 845
Data columns (total 19 columns):
compactness                    846 non-null float64
circularity                    846 non-null float64
distance_circularity           846 non-null float64
radius_ratio                   846 non-null float64
pr.axis_aspect_ratio           846 non-null float64
max.length_aspect_ratio        846 non-null float64
scatter_ratio                  846 non-null float64
elongatedness                  846 non-null float64
pr.axis_rectangularity         846 non-null float64
max.length_rectangularity      846 non-null float64
scaled_variance                846 non-null float64
scaled_variance.1              846 non-null float64
scaled_radius_of_gyration      846 non-null float64
scaled_radius_of_gyration.1    846 non-null float64
skewness_about                 846 non-null float64
skewness_about.1               846 non-null float64
skewness_about.2               846 non-null float64
hollows_ratio                  846 non-null float64
class                          846 non-null int64
dtypes: float64(18), int64(1)
memory usage: 125.7 KB
In [29]:
X=df_vehicle.drop('class',axis=1) # Assinging all features to X except the target variable, hence demarcating 
Y=df_vehicle[['class']]  # Assiging only target feature to Y
In [30]:
Y.shape
Out[30]:
(846, 1)
In [31]:
X_std=X.apply(zscore)  # Standardizing the data using zscore
In [32]:
plt.figure(figsize=(15,10))
sns.heatmap(X_std.corr(),annot=True) #Plotting the correlation matrix depicting features relationship
Out[32]:
<matplotlib.axes._subplots.AxesSubplot at 0x19ad6c07a20>
# PCA - Applying PCA for feature reduction and increaing signal to noise ratio. Here we are trying to extract relationship between all data elements in a vector space
In [33]:
X_train_std,X_test_std,Y_train,Y_test=train_test_split(X_std,Y,test_size=0.25)
In [34]:
cov_matrix = np.cov(X_train_std.T) #Finidng covariance matrix
print('Covariance Matrix \n%s', cov_matrix)
Covariance Matrix 
%s [[ 0.97935045  0.65409495  0.77918853  0.72794362  0.20499092  0.50508183
   0.78965821 -0.77319692  0.79295921  0.65050349  0.74600094  0.79216042
   0.55273633 -0.26459347  0.2009576   0.17165577  0.29996095  0.37775028]
 [ 0.65409495  0.97455056  0.76500851  0.62113107  0.19718071  0.56628281
   0.81587724 -0.79514535  0.81676267  0.940933    0.75570761  0.80437541
   0.89297829  0.04651918  0.13043933 -0.01371965 -0.10442047  0.05928008]
 [ 0.77918853  0.76500851  0.99410626  0.79791291  0.23330345  0.6698257
   0.89045615 -0.9017116   0.8829063   0.75199474  0.85149769  0.8780288
   0.6773826  -0.26397441  0.08905075  0.28495258  0.15347288  0.35134769]
 [ 0.72794362  0.62113107  0.79791291  1.00023879  0.66727808  0.45414564
   0.77213662 -0.82907349  0.75007434  0.55816748  0.77842901  0.76997137
   0.52881666 -0.4095876   0.02850141  0.22242446  0.42861476  0.51180664]
 [ 0.20499092  0.19718071  0.23330345  0.66727808  1.0234134   0.11339709
   0.19947418 -0.30104166  0.17172578  0.13219896  0.2164894   0.20157454
   0.14363609 -0.31994328 -0.06960633  0.0122384   0.42221779  0.42039762]
 [ 0.50508183  0.56628281  0.6698257   0.45414564  0.11339709  1.00502482
   0.50792388 -0.51626329  0.51026801  0.65198747  0.41485366  0.4818261
   0.40718154 -0.32410937  0.0508669   0.15296705  0.0562233   0.39746404]
 [ 0.78965821  0.81587724  0.89045615  0.77213662  0.19947418  0.50792388
   0.97483043 -0.94874545  0.96442951  0.78723012  0.92389708  0.96093384
   0.76161915 -0.02779016  0.0668994   0.22713622  0.02083333  0.1471759 ]
 [-0.77319692 -0.79514535 -0.9017116  -0.82907349 -0.30104166 -0.51626329
  -0.94874545  0.98605268 -0.93173774 -0.75731792 -0.92115526 -0.93527907
  -0.73411605  0.11536837 -0.04499028 -0.20734232 -0.12981292 -0.24084315]
 [ 0.79295921  0.81676267  0.8829063   0.75007434  0.17172578  0.51026801
   0.96442951 -0.93173774  0.97675149  0.795552    0.91467719  0.95955743
   0.76314884 -0.00990596  0.07695665  0.22226586 -0.00348269  0.12964416]
 [ 0.65050349  0.940933    0.75199474  0.55816748  0.13219896  0.65198747
   0.78723012 -0.75731792  0.795552    0.98766429  0.71592527  0.77674178
   0.8421446   0.04116802  0.11679264 -0.00234214 -0.11279909  0.08213292]
 [ 0.74600094  0.75570761  0.85149769  0.77842901  0.2164894   0.41485366
   0.92389708 -0.92115526  0.91467719  0.71592527  0.94225404  0.91760638
   0.73212538 -0.01943854  0.04046293  0.2151291   0.04300752  0.12525494]
 [ 0.79216042  0.80437541  0.8780288   0.76997137  0.20157454  0.4818261
   0.96093384 -0.93527907  0.95955743  0.77674178  0.91760638  0.97733169
   0.75291654 -0.02526602  0.07063644  0.21508396  0.03335117  0.14913748]
 [ 0.55273633  0.89297829  0.6773826   0.52881666  0.14363609  0.40718154
   0.76161915 -0.73411605  0.76314884  0.8421446   0.73212538  0.75291654
   0.96407263  0.19526254  0.16573408 -0.06356528 -0.2217557  -0.1023553 ]
 [-0.26459347  0.04651918 -0.26397441 -0.4095876  -0.31994328 -0.32410937
  -0.02779016  0.11536837 -0.00990596  0.04116802 -0.01943854 -0.02526602
   0.19526254  0.97154859 -0.04856093 -0.14922547 -0.80808906 -0.87168335]
 [ 0.2009576   0.13043933  0.08905075  0.02850141 -0.06960633  0.0508669
   0.0668994  -0.04499028  0.07695665  0.11679264  0.04046293  0.07063644
   0.16573408 -0.04856093  0.98377589 -0.05566334  0.08657678  0.05183775]
 [ 0.17165577 -0.01371965  0.28495258  0.22242446  0.0122384   0.15296705
   0.22713622 -0.20734232  0.22226586 -0.00234214  0.2151291   0.21508396
  -0.06356528 -0.14922547 -0.05566334  1.00368046  0.09802926  0.23009906]
 [ 0.29996095 -0.10442047  0.15347288  0.42861476  0.42221779  0.0562233
   0.02083333 -0.12981292 -0.00348269 -0.11279909  0.04300752  0.03335117
  -0.2217557  -0.80808906  0.08657678  0.09802926  0.97531549  0.85938541]
 [ 0.37775028  0.05928008  0.35134769  0.51180664  0.42039762  0.39746404
   0.1471759  -0.24084315  0.12964416  0.08213292  0.12525494  0.14913748
  -0.1023553  -0.87168335  0.05183775  0.23009906  0.85938541  0.96530493]]
In [35]:
eig_vals, eig_vecs = np.linalg.eig(cov_matrix)  #Finiding eigen value and vector. There will be 14 eigen values and 14 list of eigen values
In [36]:
print('Eigen Vectors \n%s', eig_vecs)
print('\n Eigen Values \n%s', eig_vals)
Eigen Vectors 
%s [[-2.72083709e-01 -8.21733374e-02  5.67426828e-02  1.32360245e-01
  -1.40985582e-01  2.46958720e-01 -2.45498882e-01 -7.50062927e-01
  -3.90337847e-01  1.18145035e-01 -4.87524545e-02 -8.44978207e-03
  -1.06395330e-01 -4.81496831e-02 -1.01453950e-01  6.39182860e-03
   4.29323417e-02 -1.28063705e-02]
 [-2.81855732e-01  1.49882957e-01  1.98222326e-01 -4.34205013e-02
   1.41713763e-01 -1.02792262e-01  3.86886416e-01 -8.92282512e-02
  -6.63840105e-02 -2.21657752e-01 -5.33103617e-02  1.40621348e-01
  -4.88505429e-02 -5.23333544e-01  2.41196633e-01  5.39851695e-02
   5.97026885e-02  5.01546451e-01]
 [-3.04227147e-01 -3.96463266e-02 -7.13985320e-02  1.22160929e-01
   6.19225859e-02  2.50854499e-03 -1.35887540e-01  3.05189828e-01
  -2.04140326e-01  1.60120972e-01 -8.12263756e-01 -8.12910390e-03
   1.15278280e-01  6.77238446e-02  1.54266849e-01 -1.67320935e-02
  -3.57665450e-02 -4.73852943e-03]
 [-2.75653246e-01 -2.02429267e-01 -5.64519375e-02 -2.60289918e-01
  -1.15507310e-01 -9.87161068e-02 -1.54798556e-01  1.03870241e-01
  -1.82179982e-01 -5.24059999e-02  2.54296995e-01  3.92056221e-01
   4.44301555e-01 -2.75899905e-01  2.15369443e-01  4.46490284e-03
  -1.07970214e-01 -4.10322413e-01]
 [-1.01962468e-01 -2.69247748e-01  5.59732077e-02 -6.48193201e-01
  -7.97246981e-02 -5.45624036e-01 -1.44568368e-01 -1.36253047e-01
   4.16800758e-03 -4.94281718e-02 -1.24108256e-01 -1.85623197e-01
  -1.76063221e-01  1.75112639e-01 -8.84015390e-02 -8.30220434e-03
   3.08084072e-02  1.65772129e-01]
 [-2.02548608e-01 -7.27846343e-02  1.18190812e-01  3.26910523e-01
   5.95826547e-01 -3.21897449e-01 -4.23011582e-01 -2.01383971e-02
   1.64890509e-01  2.08953698e-01  2.52340046e-01  8.25672479e-02
  -8.44316476e-02  8.50988651e-02  1.75783763e-01  7.93539811e-03
   4.83699341e-03  9.03609960e-02]
 [-3.08621119e-01  7.61978328e-02 -1.06303972e-01  6.86815079e-04
  -8.62052810e-02  1.08105072e-01 -7.73289221e-02  7.19158100e-02
   1.33414653e-01 -1.31342348e-01  1.10437839e-01 -2.07678622e-01
  -1.04713461e-01 -4.80108770e-02  2.56554619e-02 -8.38613779e-01
  -2.19859035e-01  3.82067583e-02]
 [ 3.09380794e-01 -1.46618349e-02  8.96555144e-02  6.57008031e-02
   7.09835093e-02 -9.77517083e-02  7.32202876e-02 -2.00009613e-01
  -2.65807578e-01 -1.04727999e-01  2.69242162e-02 -1.05489075e-01
   6.21827304e-01  3.59343710e-01  2.63786690e-01 -2.17634765e-01
  -9.48749897e-02  3.10698546e-01]
 [-3.06686143e-01  8.94171486e-02 -9.87601100e-02  2.23698499e-02
  -8.22728983e-02  1.12501327e-01 -8.38293181e-02  2.26589718e-02
   1.13021979e-01 -2.07994268e-01  1.20851937e-01 -3.51861519e-01
   8.34543879e-02  1.13760181e-01  4.63539719e-02  4.76168071e-01
  -6.34839273e-01  1.20287614e-01]
 [-2.74961837e-01  1.48270180e-01  2.03351342e-01  4.52515594e-02
   2.46810869e-01 -1.09475040e-01  3.37602086e-01 -2.39657964e-01
   2.00727281e-01 -3.87968704e-01 -1.72684899e-01  2.16006668e-01
   8.27080665e-02  3.58745977e-01 -2.40132924e-01 -3.77748429e-02
  -1.53061179e-02 -3.90572047e-01]
 [-2.94629420e-01  6.83507829e-02 -1.34978921e-01 -6.24334416e-02
  -1.48044378e-01  1.52587104e-01 -6.27312660e-02  1.81054814e-01
   2.56660471e-02  1.29420858e-01  1.53263759e-01  4.55511926e-01
   1.49248358e-01  3.22871506e-01 -4.09070666e-01  1.89169908e-02
   1.26363120e-01  4.95896794e-01]
 [-3.06118225e-01  7.35352514e-02 -1.01380420e-01 -1.00858112e-02
  -1.09308544e-01  1.34584042e-01 -7.29896683e-02  3.82311732e-02
   1.54685853e-01 -1.89452439e-01  1.22442344e-01 -3.72467385e-01
   1.57850578e-01  1.45059558e-01  2.96944958e-01  1.29243758e-01
   6.97539069e-01 -5.20580345e-02]
 [-2.55146402e-01  2.26122292e-01  1.99752150e-01 -9.86638231e-02
   1.01237004e-02 -9.57563734e-02  4.37258412e-01  1.18357568e-01
  -3.02592614e-01  5.99817579e-01  2.38395201e-01 -2.33988398e-01
  -3.25071583e-02  1.66765935e-01  3.06727322e-02 -9.59789634e-03
  -3.61784215e-02 -1.59758189e-01]
 [ 5.61702541e-02  4.93177917e-01 -1.01774004e-01 -1.49711379e-01
  -1.52560817e-01 -1.22490144e-01 -1.12147784e-01 -3.26442380e-01
   5.20028943e-01  3.84677778e-01 -1.83924404e-01  7.14055610e-02
   2.68364340e-01 -1.49392248e-01  9.58195187e-02 -1.01662334e-02
  -4.82951657e-02 -1.18302567e-02]
 [-3.53711362e-02 -9.75292275e-04  5.50574008e-01  4.13275851e-01
  -6.07928571e-01 -3.36150792e-01 -1.22999601e-01  1.26158174e-01
   7.51008698e-02 -6.16294864e-02  1.00403882e-02  1.79494867e-02
   6.70481637e-03 -6.66062713e-03 -3.34725051e-02 -4.05812570e-04
   9.47039515e-03  6.25042383e-03]
 [-6.66641595e-02 -1.18556536e-01 -6.81283584e-01  3.77573199e-01
  -1.93142598e-01 -4.67357289e-01  2.95383353e-01 -1.33609017e-01
  -4.95303588e-02  7.02324589e-03  4.28483699e-02  4.50427300e-02
  -7.33283666e-02  2.27511719e-02  3.69005220e-02  1.27265657e-02
   1.04261578e-02 -8.21008974e-03]
 [-4.54525307e-02 -5.01184877e-01  1.05381502e-01 -3.00661118e-02
  -1.49082183e-01  2.70968562e-01  2.60877649e-01 -1.17208108e-01
   3.68149510e-01  2.00371433e-01 -3.48398167e-02  2.23116664e-01
  -1.71504055e-01  2.49943059e-01  4.73714171e-01 -6.89169195e-03
  -1.05478238e-01  5.61527505e-02]
 [-9.70153032e-02 -4.94735082e-01  6.72091953e-02  1.16709336e-01
   1.22452804e-01  3.27723412e-02  1.78792490e-01 -3.31348067e-02
   2.63506358e-01  1.89797923e-01 -4.02964920e-02 -3.19653749e-01
   4.13437689e-01 -3.06049605e-01 -4.47275797e-01 -1.85774033e-02
   5.09397017e-02  4.12289362e-02]]

 Eigen Values 
%s [9.56957043e+00 3.25915394e+00 1.18786721e+00 1.12980006e+00
 9.15787160e-01 6.27778928e-01 3.06484738e-01 2.29624452e-01
 1.24968763e-01 8.07973168e-02 6.24419963e-02 5.51214117e-02
 3.98848762e-02 2.97266721e-02 2.65544253e-02 8.21273717e-03
 1.59081783e-02 1.55835904e-02]
In [37]:
eigen_pairs = [(np.abs(eig_vals[i]), eig_vecs[ :, i]) for i in range(len(eig_vals))]  #Creating eigen pairs, at 0 index it will be eigen value and at index 1 it will be eigen vector
print(eigen_pairs)
[(9.569570425281333, array([-0.27208371, -0.28185573, -0.30422715, -0.27565325, -0.10196247,
       -0.20254861, -0.30862112,  0.30938079, -0.30668614, -0.27496184,
       -0.29462942, -0.30611822, -0.2551464 ,  0.05617025, -0.03537114,
       -0.06666416, -0.04545253, -0.0970153 ])), (3.2591539444561306, array([-0.08217334,  0.14988296, -0.03964633, -0.20242927, -0.26924775,
       -0.07278463,  0.07619783, -0.01466183,  0.08941715,  0.14827018,
        0.06835078,  0.07353525,  0.22612229,  0.49317792, -0.00097529,
       -0.11855654, -0.50118488, -0.49473508])), (1.1878672132036994, array([ 0.05674268,  0.19822233, -0.07139853, -0.05645194,  0.05597321,
        0.11819081, -0.10630397,  0.08965551, -0.09876011,  0.20335134,
       -0.13497892, -0.10138042,  0.19975215, -0.101774  ,  0.55057401,
       -0.68128358,  0.1053815 ,  0.0672092 ])), (1.129800059636205, array([ 0.13236025, -0.0434205 ,  0.12216093, -0.26028992, -0.6481932 ,
        0.32691052,  0.00068682,  0.0657008 ,  0.02236985,  0.04525156,
       -0.06243344, -0.01008581, -0.09866382, -0.14971138,  0.41327585,
        0.3775732 , -0.03006611,  0.11670934])), (0.9157871599701598, array([-0.14098558,  0.14171376,  0.06192259, -0.11550731, -0.0797247 ,
        0.59582655, -0.08620528,  0.07098351, -0.0822729 ,  0.24681087,
       -0.14804438, -0.10930854,  0.0101237 , -0.15256082, -0.60792857,
       -0.1931426 , -0.14908218,  0.1224528 ])), (0.6277789276569754, array([ 0.24695872, -0.10279226,  0.00250854, -0.09871611, -0.54562404,
       -0.32189745,  0.10810507, -0.09775171,  0.11250133, -0.10947504,
        0.1525871 ,  0.13458404, -0.09575637, -0.12249014, -0.33615079,
       -0.46735729,  0.27096856,  0.03277234])), (0.30648473803194193, array([-0.24549888,  0.38688642, -0.13588754, -0.15479856, -0.14456837,
       -0.42301158, -0.07732892,  0.07322029, -0.08382932,  0.33760209,
       -0.06273127, -0.07298967,  0.43725841, -0.11214778, -0.1229996 ,
        0.29538335,  0.26087765,  0.17879249])), (0.2296244522954111, array([-0.75006293, -0.08922825,  0.30518983,  0.10387024, -0.13625305,
       -0.0201384 ,  0.07191581, -0.20000961,  0.02265897, -0.23965796,
        0.18105481,  0.03823117,  0.11835757, -0.32644238,  0.12615817,
       -0.13360902, -0.11720811, -0.03313481])), (0.12496876256478326, array([-0.39033785, -0.06638401, -0.20414033, -0.18217998,  0.00416801,
        0.16489051,  0.13341465, -0.26580758,  0.11302198,  0.20072728,
        0.02566605,  0.15468585, -0.30259261,  0.52002894,  0.07510087,
       -0.04953036,  0.36814951,  0.26350636])), (0.08079731683817794, array([ 0.11814504, -0.22165775,  0.16012097, -0.052406  , -0.04942817,
        0.2089537 , -0.13134235, -0.104728  , -0.20799427, -0.3879687 ,
        0.12942086, -0.18945244,  0.59981758,  0.38467778, -0.06162949,
        0.00702325,  0.20037143,  0.18979792])), (0.06244199630495922, array([-0.04875245, -0.05331036, -0.81226376,  0.254297  , -0.12410826,
        0.25234005,  0.11043784,  0.02692422,  0.12085194, -0.1726849 ,
        0.15326376,  0.12244234,  0.2383952 , -0.1839244 ,  0.01004039,
        0.04284837, -0.03483982, -0.04029649])), (0.05512141166418448, array([-0.00844978,  0.14062135, -0.0081291 ,  0.39205622, -0.1856232 ,
        0.08256725, -0.20767862, -0.10548908, -0.35186152,  0.21600667,
        0.45551193, -0.37246739, -0.2339884 ,  0.07140556,  0.01794949,
        0.04504273,  0.22311666, -0.31965375])), (0.03988487620268687, array([-0.10639533, -0.04885054,  0.11527828,  0.44430156, -0.17606322,
       -0.08443165, -0.10471346,  0.6218273 ,  0.08345439,  0.08270807,
        0.14924836,  0.15785058, -0.03250716,  0.26836434,  0.00670482,
       -0.07332837, -0.17150406,  0.41343769])), (0.029726672145352098, array([-0.04814968, -0.52333354,  0.06772384, -0.27589991,  0.17511264,
        0.08509887, -0.04801088,  0.35934371,  0.11376018,  0.35874598,
        0.32287151,  0.14505956,  0.16676594, -0.14939225, -0.00666063,
        0.02275117,  0.24994306, -0.3060496 ])), (0.026554425333315236, array([-0.10145395,  0.24119663,  0.15426685,  0.21536944, -0.08840154,
        0.17578376,  0.02565546,  0.26378669,  0.04635397, -0.24013292,
       -0.40907067,  0.29694496,  0.03067273,  0.09581952, -0.03347251,
        0.03690052,  0.47371417, -0.4472758 ])), (0.008212737165175912, array([ 6.39182860e-03,  5.39851695e-02, -1.67320935e-02,  4.46490284e-03,
       -8.30220434e-03,  7.93539811e-03, -8.38613779e-01, -2.17634765e-01,
        4.76168071e-01, -3.77748429e-02,  1.89169908e-02,  1.29243758e-01,
       -9.59789634e-03, -1.01662334e-02, -4.05812570e-04,  1.27265657e-02,
       -6.89169195e-03, -1.85774033e-02])), (0.015908178308774116, array([ 0.04293234,  0.05970269, -0.03576654, -0.10797021,  0.03080841,
        0.00483699, -0.21985903, -0.09487499, -0.63483927, -0.01530612,
        0.12636312,  0.69753907, -0.03617842, -0.04829517,  0.0094704 ,
        0.01042616, -0.10547824,  0.0509397 ])), (0.0155835903925348, array([-0.01280637,  0.50154645, -0.00473853, -0.41032241,  0.16577213,
        0.090361  ,  0.03820676,  0.31069855,  0.12028761, -0.39057205,
        0.49589679, -0.05205803, -0.15975819, -0.01183026,  0.00625042,
       -0.00821009,  0.05615275,  0.04122894]))]
In [38]:
eigen_pairs.sort(key=lambda x: x[0], reverse=True)
In [39]:
# Visually confirm that the list is correctly sorted by decreasing eigenvalues
print('Eigenvalues in descending order:')
for i in eigen_pairs:
    print(i[0])

eigen_pairs
Eigenvalues in descending order:
9.569570425281333
3.2591539444561306
1.1878672132036994
1.129800059636205
0.9157871599701598
0.6277789276569754
0.30648473803194193
0.2296244522954111
0.12496876256478326
0.08079731683817794
0.06244199630495922
0.05512141166418448
0.03988487620268687
0.029726672145352098
0.026554425333315236
0.015908178308774116
0.0155835903925348
0.008212737165175912
Out[39]:
[(9.569570425281333,
  array([-0.27208371, -0.28185573, -0.30422715, -0.27565325, -0.10196247,
         -0.20254861, -0.30862112,  0.30938079, -0.30668614, -0.27496184,
         -0.29462942, -0.30611822, -0.2551464 ,  0.05617025, -0.03537114,
         -0.06666416, -0.04545253, -0.0970153 ])),
 (3.2591539444561306,
  array([-0.08217334,  0.14988296, -0.03964633, -0.20242927, -0.26924775,
         -0.07278463,  0.07619783, -0.01466183,  0.08941715,  0.14827018,
          0.06835078,  0.07353525,  0.22612229,  0.49317792, -0.00097529,
         -0.11855654, -0.50118488, -0.49473508])),
 (1.1878672132036994,
  array([ 0.05674268,  0.19822233, -0.07139853, -0.05645194,  0.05597321,
          0.11819081, -0.10630397,  0.08965551, -0.09876011,  0.20335134,
         -0.13497892, -0.10138042,  0.19975215, -0.101774  ,  0.55057401,
         -0.68128358,  0.1053815 ,  0.0672092 ])),
 (1.129800059636205,
  array([ 0.13236025, -0.0434205 ,  0.12216093, -0.26028992, -0.6481932 ,
          0.32691052,  0.00068682,  0.0657008 ,  0.02236985,  0.04525156,
         -0.06243344, -0.01008581, -0.09866382, -0.14971138,  0.41327585,
          0.3775732 , -0.03006611,  0.11670934])),
 (0.9157871599701598,
  array([-0.14098558,  0.14171376,  0.06192259, -0.11550731, -0.0797247 ,
          0.59582655, -0.08620528,  0.07098351, -0.0822729 ,  0.24681087,
         -0.14804438, -0.10930854,  0.0101237 , -0.15256082, -0.60792857,
         -0.1931426 , -0.14908218,  0.1224528 ])),
 (0.6277789276569754,
  array([ 0.24695872, -0.10279226,  0.00250854, -0.09871611, -0.54562404,
         -0.32189745,  0.10810507, -0.09775171,  0.11250133, -0.10947504,
          0.1525871 ,  0.13458404, -0.09575637, -0.12249014, -0.33615079,
         -0.46735729,  0.27096856,  0.03277234])),
 (0.30648473803194193,
  array([-0.24549888,  0.38688642, -0.13588754, -0.15479856, -0.14456837,
         -0.42301158, -0.07732892,  0.07322029, -0.08382932,  0.33760209,
         -0.06273127, -0.07298967,  0.43725841, -0.11214778, -0.1229996 ,
          0.29538335,  0.26087765,  0.17879249])),
 (0.2296244522954111,
  array([-0.75006293, -0.08922825,  0.30518983,  0.10387024, -0.13625305,
         -0.0201384 ,  0.07191581, -0.20000961,  0.02265897, -0.23965796,
          0.18105481,  0.03823117,  0.11835757, -0.32644238,  0.12615817,
         -0.13360902, -0.11720811, -0.03313481])),
 (0.12496876256478326,
  array([-0.39033785, -0.06638401, -0.20414033, -0.18217998,  0.00416801,
          0.16489051,  0.13341465, -0.26580758,  0.11302198,  0.20072728,
          0.02566605,  0.15468585, -0.30259261,  0.52002894,  0.07510087,
         -0.04953036,  0.36814951,  0.26350636])),
 (0.08079731683817794,
  array([ 0.11814504, -0.22165775,  0.16012097, -0.052406  , -0.04942817,
          0.2089537 , -0.13134235, -0.104728  , -0.20799427, -0.3879687 ,
          0.12942086, -0.18945244,  0.59981758,  0.38467778, -0.06162949,
          0.00702325,  0.20037143,  0.18979792])),
 (0.06244199630495922,
  array([-0.04875245, -0.05331036, -0.81226376,  0.254297  , -0.12410826,
          0.25234005,  0.11043784,  0.02692422,  0.12085194, -0.1726849 ,
          0.15326376,  0.12244234,  0.2383952 , -0.1839244 ,  0.01004039,
          0.04284837, -0.03483982, -0.04029649])),
 (0.05512141166418448,
  array([-0.00844978,  0.14062135, -0.0081291 ,  0.39205622, -0.1856232 ,
          0.08256725, -0.20767862, -0.10548908, -0.35186152,  0.21600667,
          0.45551193, -0.37246739, -0.2339884 ,  0.07140556,  0.01794949,
          0.04504273,  0.22311666, -0.31965375])),
 (0.03988487620268687,
  array([-0.10639533, -0.04885054,  0.11527828,  0.44430156, -0.17606322,
         -0.08443165, -0.10471346,  0.6218273 ,  0.08345439,  0.08270807,
          0.14924836,  0.15785058, -0.03250716,  0.26836434,  0.00670482,
         -0.07332837, -0.17150406,  0.41343769])),
 (0.029726672145352098,
  array([-0.04814968, -0.52333354,  0.06772384, -0.27589991,  0.17511264,
          0.08509887, -0.04801088,  0.35934371,  0.11376018,  0.35874598,
          0.32287151,  0.14505956,  0.16676594, -0.14939225, -0.00666063,
          0.02275117,  0.24994306, -0.3060496 ])),
 (0.026554425333315236,
  array([-0.10145395,  0.24119663,  0.15426685,  0.21536944, -0.08840154,
          0.17578376,  0.02565546,  0.26378669,  0.04635397, -0.24013292,
         -0.40907067,  0.29694496,  0.03067273,  0.09581952, -0.03347251,
          0.03690052,  0.47371417, -0.4472758 ])),
 (0.015908178308774116,
  array([ 0.04293234,  0.05970269, -0.03576654, -0.10797021,  0.03080841,
          0.00483699, -0.21985903, -0.09487499, -0.63483927, -0.01530612,
          0.12636312,  0.69753907, -0.03617842, -0.04829517,  0.0094704 ,
          0.01042616, -0.10547824,  0.0509397 ])),
 (0.0155835903925348,
  array([-0.01280637,  0.50154645, -0.00473853, -0.41032241,  0.16577213,
          0.090361  ,  0.03820676,  0.31069855,  0.12028761, -0.39057205,
          0.49589679, -0.05205803, -0.15975819, -0.01183026,  0.00625042,
         -0.00821009,  0.05615275,  0.04122894])),
 (0.008212737165175912,
  array([ 6.39182860e-03,  5.39851695e-02, -1.67320935e-02,  4.46490284e-03,
         -8.30220434e-03,  7.93539811e-03, -8.38613779e-01, -2.17634765e-01,
          4.76168071e-01, -3.77748429e-02,  1.89169908e-02,  1.29243758e-01,
         -9.59789634e-03, -1.01662334e-02, -4.05812570e-04,  1.27265657e-02,
         -6.89169195e-03, -1.85774033e-02]))]
In [40]:
tot = sum(eig_vals)
var_exp = [( i /tot ) * 100 for i in sorted(eig_vals, reverse=True)]
cum_var_exp = np.cumsum(var_exp)
print("Cumulative Variance Explained", cum_var_exp)
Cumulative Variance Explained [ 54.11041002  72.53904875  79.2557538   85.64412253  90.82237155
  94.37209988  96.10509458  97.40348862  98.11011501  98.56697731
  98.92005084  99.23173068  99.45725669  99.6253439   99.77549388
  99.86544547  99.9535617  100.        ]
In [41]:
plt.figure(figsize=(10 , 7))
plt.bar(range(18), var_exp, alpha = 0.5, align = 'center', label = 'Individual explained variance')
plt.step(range(18), cum_var_exp, where='mid', label = 'Cumulative explained variance')
plt.ylabel('Explained Variance Ratio')
plt.xlabel('Principal Components')
plt.legend(loc = 'best')
plt.tight_layout()
plt.show()
In [44]:
eigvalues_sort = [eigen_pairs[index][0] for index in range(len(eig_vals))]
eigvectors_sort = [eigen_pairs[index][1] for index in range(len(eig_vals))]
# COnsidering 12 PC values
In [45]:
P_reduce = np.array(eigvectors_sort[0:12]).transpose()   # Selecting first 12 eigen vectors of all 18

Proj_train_data = np.dot(X_train_std,P_reduce)   # projecting training data onto the eight eigen vectors

Proj_test_data = np.dot(X_test_std,P_reduce)    # projecting test data onto the eight eigen vectors
In [47]:
# complete matrix operation 
print(Y_train.shape)
X_train_std.shape, P_reduce.shape, Proj_train_data.shape
(634, 1)
Out[47]:
((634, 18), (18, 12), (634, 12))

SVM

In [69]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
In [70]:
svc_model=SVC()
parameters={'kernel' : ('rbf','linear'),
            'C' : [0.01, 0.05, 0.5, 1]}
In [71]:
clf = GridSearchCV(svc_model, parameters)
In [72]:
clf.fit(X_train_std,Y_train)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:1978: FutureWarning: The default value of cv will change from 3 to 5 in version 0.22. Specify it explicitly to silence this warning.
  warnings.warn(CV_WARNING, FutureWarning)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)
Out[72]:
GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid={'C': [0.01, 0.05, 0.5, 1],
                         'kernel': ('rbf', 'linear')},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)
In [73]:
print("best hyperparameter from the values iserted {}".format(clf.best_estimator_))
best hyperparameter from the values iserted SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='rbf', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)
In [74]:
sorted(clf.cv_results_.keys())
Out[74]:
['mean_fit_time',
 'mean_score_time',
 'mean_test_score',
 'param_C',
 'param_kernel',
 'params',
 'rank_test_score',
 'split0_test_score',
 'split1_test_score',
 'split2_test_score',
 'std_fit_time',
 'std_score_time',
 'std_test_score']
In [75]:
print ("Best score on training set {}".format(clf.best_score_))
Best score on training set 0.9511041009463722
In [78]:
prediction=clf.predict(X_test_std)  #Prediction on X_test
In [80]:
print("printing confusion matrix")
confusion_matrix(Y_test,prediction)
printing confusion matrix
Out[80]:
array([[ 51,   1,   1],
       [  2, 107,   4],
       [  1,   7,  38]], dtype=int64)